# Table_1.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file produces Table 1 in the article: "Effects of attendance laws on 
# college enrollment and parental education."



library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))       # for qw()
library(lmtest)        # coeftest() helps with clustered standard errors
library(multiwayvcov)  # for clustered standard errors with lm objects
library(stringr)       # for str_pad()
source("IV_setup.R")
filenameStem     <- 'float_output/Table_1'  
firstStageModels <- eapply(IVModelsEnv, getFirstStage, endog = 'educ')



##############################################################################
# ESTIMATE COLLEGE ENROLLMENT REGRESSIONS
##############################################################################
colAttend           <- update(firstStageModels$eqwlth.mod1, collegeAttended ~ .)
GSS.df.colAttend    <- na.omit(GSS.df [, all.vars(colAttend)])
ANES.df.colAttend   <- na.omit(ANES.df[, all.vars(colAttend)])
pooled.df.colAttend <- rbind(GSS.df.colAttend, ANES.df.colAttend) 
colAttend.ANES      <- lm(colAttend, data = ANES.df.colAttend,              singular.ok = FALSE) 
colAttend.ANES.r    <- update(colAttend.ANES, . ~ . - CA.fac,               singular.ok = FALSE)
colAttend.GSS       <- update(colAttend.ANES,   data = GSS.df.colAttend,    singular.ok = FALSE)
colAttend.GSS.r     <- update(colAttend.ANES.r, data = GSS.df.colAttend,    singular.ok = FALSE)
colAttend.pooled    <- update(colAttend.ANES,   data = pooled.df.colAttend, singular.ok = FALSE)
colAttend.pooled.r  <- update(colAttend.ANES.r, data = pooled.df.colAttend, singular.ok = FALSE)

colAttend.R2               <- c(
  summary(colAttend.pooled)$r.squared,
  summary(colAttend.ANES)$r.squared,   
  summary(colAttend.GSS)$r.squared)
colAttend.R2NoInstruments  <- c(
  summary(colAttend.pooled.r)$r.squared, 
  summary(colAttend.ANES.r)$r.squared, 
  summary(colAttend.GSS.r)$r.squared)
colAttend.SER              <- c(
  summary(colAttend.pooled)$sigma,       
  summary(colAttend.ANES)$sigma,       
  summary(colAttend.GSS)$sigma)
colAttend.SERNoInstruments <- c(
  summary(colAttend.pooled.r)$sigma,     
  summary(colAttend.ANES.r)$sigma,     
  summary(colAttend.GSS.r)$sigma)
colAttend.Fstat <- c(
  anova(colAttend.pooled, colAttend.pooled.r)$F[2],
  anova(colAttend.ANES,   colAttend.ANES.r)$F[2],
  anova(colAttend.GSS,    colAttend.GSS.r)$F[2])
colAttend.N <- c(nobs(colAttend.pooled), nobs(colAttend.ANES), nobs(colAttend.GSS))
colAttend.SER              <- gsub('^0\\.', '.', round(colAttend.SER, 2))
colAttend.SERNoInstruments <- gsub('^0\\.', '.', round(colAttend.SERNoInstruments, 2)) 
colAttend.Fstat <- as.character(round(colAttend.Fstat, 2))
colAttend.Fstat <- str_pad(colAttend.Fstat, side = 'right', pad = 0, width = 4)



##############################################################################
# PARENTAL-EDUCATION PLACEBO TESTS (BASELINE MODEL ONLY)
##############################################################################

# Predicting whether at least one parent graduated from high school.
parPlaceboNoHS       <- update(firstStageModels$eqwlth.mod1, parents.minOneHS ~ .) 
parPlaceboNoHS.df    <- na.omit(GSS.df[, all.vars(parPlaceboNoHS)]) 
parPlaceboNoHS.lm1   <- lm(parPlaceboNoHS, data = parPlaceboNoHS.df, singular.ok = FALSE)
parPlaceboNoHS.r     <- update(parPlaceboNoHS.lm1, . ~ . - CA.fac)  
parPlaceboNoHS.R2    <- summary(parPlaceboNoHS.lm1)$r.squared
parPlaceboNoHS.SER   <- summary(parPlaceboNoHS.lm1)$sigma
parPlaceboNoHS.SER   <- gsub('^0\\.', '.', round(parPlaceboNoHS.SER, 2)) 
parPlaceboNoHS.SERNoInstruments <- summary(parPlaceboNoHS.r)$sigma
parPlaceboNoHS.SERNoInstruments <- gsub('^0\\.', '.', round(parPlaceboNoHS.SERNoInstruments, 2)) 
parPlaceboNoHS.Fstat <- anova(parPlaceboNoHS.lm1, parPlaceboNoHS.r)$F[2]
parPlaceboNoHS.Fstat <- as.character(round(parPlaceboNoHS.Fstat, 2))
parPlaceboNoHS.Fstat <- str_pad(parPlaceboNoHS.Fstat, side = 'right', pad = 0, width = 4)
parPlaceboNoHS.N     <- nobs(parPlaceboNoHS.lm1)

# Predicting whether at least one parent graduated from college.
parPlaceboNoBA       <- update(firstStageModels$eqwlth.mod1, parents.minOneBA ~ .) 
parPlaceboNoBA.df    <- na.omit(GSS.df[, all.vars(parPlaceboNoBA)]) 
parPlaceboNoBA.lm1   <- lm(parPlaceboNoBA, data = parPlaceboNoBA.df, singular.ok = FALSE)
parPlaceboNoBA.r     <- update(parPlaceboNoBA.lm1, . ~ . - CA.fac)  
parPlaceboNoBA.R2    <- summary(parPlaceboNoBA.lm1)$r.squared
parPlaceboNoBA.SER   <- summary(parPlaceboNoBA.lm1)$sigma
parPlaceboNoBA.SER   <- gsub('^0\\.', '.', round(parPlaceboNoBA.SER, 2)) 
parPlaceboNoBA.SERNoInstruments <- summary(parPlaceboNoBA.r)$sigma
parPlaceboNoBA.SERNoInstruments <- gsub('^0\\.', '.', round(parPlaceboNoBA.SERNoInstruments, 2)) 
parPlaceboNoBA.Fstat <- anova(parPlaceboNoBA.lm1, parPlaceboNoBA.r)$F[2]
parPlaceboNoBA.Fstat <- as.character(round(parPlaceboNoBA.Fstat, 2))
parPlaceboNoBA.Fstat <- str_pad(parPlaceboNoBA.Fstat, side = 'right', pad = 0, width = 4)
parPlaceboNoBA.N     <- nobs(parPlaceboNoBA.lm1)  

# Concatenating some results
parPlacebo.N     <- c(parPlaceboNoHS.N,     parPlaceboNoBA.N) 
parPlacebo.R2    <- c(parPlaceboNoHS.R2,    parPlaceboNoBA.R2)
parPlacebo.SER   <- c(parPlaceboNoHS.SER,   parPlaceboNoBA.SER) 
parPlacebo.Fstat <- c(parPlaceboNoHS.Fstat, parPlaceboNoBA.Fstat)



##############################################################################
# MAKE REGRESSION TABLE
##############################################################################
regObjList <- list(
  colAttend.pooled, 
  colAttend.ANES, 
  colAttend.GSS, 
  parPlaceboNoHS.lm1, 
  parPlaceboNoBA.lm1)  
robustnessRegTable <- regTable(
  objList      = regObjList,
  colNames     = NULL,
  clusterSEs   = TRUE,
  clusterVar   = sapply(regObjList, function (x) droplevels(x$model$stateYoung:factor(x$model$yearYoung))), 
  rowsToRemove = qw('female race born state year age (Intercept)'))



##############################################################################
# MAKE LATEX TABLE
##############################################################################
latexRowNames <- c(
  'Moderate law',  
  'Strict law')    
latexFooterRows <- list(  
  c('$R^2$',                                round(colAttend.R2, 2), round(parPlacebo.R2, 2)),
  c('Std. error of regression',             colAttend.SER,   parPlacebo.SER),
  c('\\textit{F} for instrument exclusion', colAttend.Fstat, parPlacebo.Fstat),
  c('Number of observations',               colAttend.N,     parPlacebo.N))
latexFooterRows[[1]] <- gsub('^0\\.', '.', latexFooterRows[[1]])  # eliminate 0 before decimal point
latexColNames <- list(
  c('',            '',           '',           'parent',    'parent'),
  c('enrolled',    'enrolled',   'enrolled',   'graduated', 'graduated'),
  c('in college',  'in college', 'in college', 'from HS',   'from college'),
  c('(pooled)',    '(ANES)',     '(GSS)',      '(GSS)',     '(GSS)')) 
latexCaption     <- '\\textit{Effects of attendance laws on college enrollment and parental education.}  Cell entries are OLS estimates and standard errors.  ``Moderate\'\' and ``strict\'\' laws are as defined in the ``Data\'\' section.  The outcomes are binary variables indicating whether one ever enrolled in college, or whether either of one\'s parents graduated from high school or college.  All variables whose estimates are reported are coded~0 or~1.  All regressions include the control variables and fixed effects that are in the baseline model (\\autoref{Eq_IVSecondStage}).  Standard errors are clustered at the state-year level.  Parental education data are from the GSS alone; the ANES does not collect information on parental education.'
robustnessLatexTable <- latexTable(
  mat                     = robustnessRegTable,
  starredFloat            = FALSE,
  rowNames                = latexRowNames,  
  colNames                = latexColNames,
  footerRows              = latexFooterRows,
  headerFooter            = TRUE,
  spaceBetweenColnameRows = FALSE,
  decimalPlaces           = 3,
  spacerColumns           = seq(0, ncol(robustnessRegTable) - 1, by = 2),  # before rowname and after each SE column
  hspace                  = '-.375in',
  caption                 = latexCaption,
  commandName             = 'TabIVCollegeEnrollmentAndParentalPlacebo',
  callCommand             = FALSE)

# Tweak the formatting of the last column
colSpecRows    <- grep('>{{\\hspace*{0em}}}N{1}{3}%', robustnessLatexTable, fixed = TRUE)
lastColSpecRow <- colSpecRows[length(colSpecRows)] 
robustnessLatexTable[lastColSpecRow] <- sub(
  pattern     = '\\hspace*{0em}', 
  replacement = '\\hspace*{.5em}',
  x           = robustnessLatexTable[lastColSpecRow],
  fixed       = TRUE)

# Write the LaTeX table to a file.
latexTablePDF(
  latexCommands      = list(robustnessLatexTable), 
  firstPageEmpty     = FALSE,  
  continuedFloat     = FALSE,  
  wrapper            = FALSE,  # FALSE if inserting LaTeX tables into appendix of my paper
  outputFilenameStem = filenameStem,
  overwriteExisting  = TRUE,
  keepPDFFile        = FALSE,
  keepTexFile        = TRUE,
  openPDFOnExit      = FALSE)
